library(tidyverse)  # ggplot(), %>%, mutate(), and friends 
library(broom)
library(MatchIt)  # Match things
library(Rcpp)
library(MASS)
library(modelsummary)
library(IRdisplay)
library(haven)
library(dplyr)
library(jtools)
library(car)
library(margins)
library(marginaleffects)
library(cobalt)
library(ggplot2)

# set directory



library(janitor)


##################### IMPORT NIBRS
nibrs <- readRDS('C:/./nibrs_prepared_dataset_91_20_days.rds')

nibrs <- clean_names(nibrs)
nibrs$race_of_victim_black <- as.character(nibrs$race_of_victim_black)
nibrs$incident_date <- as.POSIXct(nibrs$incident_date) # otherwise margins won't work

# final cleaning
nibrs$agency_indicator[nibrs$agency_indicator == '5'] <- "other agencies"
nibrs$vic_relation[is.na(nibrs$vic_relation)] = "relationship unknown"

# check whether means are statistically significant via t-test
with(nibrs, t.test(days_to_arrest ~ race_of_victim_black))


# histogram 
ggplot(nibrs, aes(x = days_to_arrest)) +
  geom_histogram(fill = "white", colour = "black", bins=100) +
  facet_grid(race_of_victim_black ~ .)



# desc stats
desc <- nibrs %>%
  group_by(race_of_victim_black) %>%
  summarise(
    count = n(),
    mean_days_to_arrest = mean(days_to_arrest, na.rm = TRUE),
    median_days_to_arrest = median(days_to_arrest, na.rm = TRUE),
    sd_days_to_arrest = sd(days_to_arrest, na.rm = TRUE),
    min_days_to_arrest = min(days_to_arrest, na.rm = TRUE),
    max_days_to_arrest = max(days_to_arrest, na.rm = TRUE)
  )


#################################################################
####################### MAIN MODELS #############################
#################################################################



############## NIBRS (all with agency-level info and states)
m.out3ni <- matchit(race_of_victim_black ~ total_offender_segments+total_victims + 
                      sex_of_victim+
                      agecat+
                      decade+
                      agency_indicator+
                      monthly_agency_overlap +
                      state_x,
                    data=nibrs,
                    method = "exact",
                    estimand="ATT")

bal.tab(m.out3ni)
# 


matched_dataset3ni <- match.data(m.out3ni)



# modeling (unadjusted and adjusted)


#ggpredict(model_matched3ni, terms="race_of_victim_black") %>% plot()



#adjusted
model_matched3ni <- glm(days_to_arrest ~
                          race_of_victim_black + total_offender_segments+total_victims + 
                          sex_of_victim+
                          agecat+
                          decade+
                          weapon+
                          circumstance+
                          agency_indicator+
                          factor(monthly_agency_overlap) +
                          state_x+
                          population_group+
                          location_type,
                        family=quasipoisson, data = matched_dataset3ni, weights=weights)

tidy(model_matched3ni)





# get odds ratios and 95% cis
summ(model_matched3ni, exp = TRUE, robust=TRUE, digits=3)

ame_3ni_matched<-comparisons(model_matched3ni, variables = "race_of_victim_black",vcov = "HC3",
                               newdata = subset(matched_dataset3ni, race_of_victim_black == 1)) |>
  summary()
ame_3ni_matched$dataset <- 'NIBRS (1991-2020)'
ame_3ni_matched$model <- 'Adjusted/Matched'



######### unmatched
model_unmatched3ni <- glm(days_to_arrest ~
                            race_of_victim_black + total_offender_segments+total_victim_segments + 
                            sex_of_victim+
                            agecat+
                            decade+
                            weapon+
                            circumstance+
                            agency_indicator+
                            factor(monthly_agency_overlap) +
                            state_x+
                            population_group+
                            location_type,
                          family=poisson, data = nibrs)

summ(model_unmatched3ni, exp = TRUE, robust=TRUE, digits=3)
# GENERAL ATT
ame_3ni_unmatched<-comparisons(model_unmatched3ni, variables = "race_of_victim_black",vcov = "HC3",
                               newdata = subset(nibrs, race_of_victim_black == 1)) |>
  summary()
ame_3ni_unmatched$dataset <- 'NIBRS (1991-2020)'
ame_3ni_unmatched$model <- 'Adjusted/Unmatched'






############### PLOTTING
# plotting (Figure on national AMEs)

ame_list <- do.call("rbind", list(ame_3ni_matched, ame_3ni_unmatched ))
ame_list <- ame_list[order(ame_list$dataset),]
ame_list$dataset<- fct_inorder(ame_list$dataset)
ame_list$model = factor(ame_list$model, levels=c("Adjusted/Matched", "Adjusted/Unmatched"))
# plot
dodge <- position_dodge(width=0.5)
ame_plot <- ggplot(ame_list, x=factor(ame_list$dataset, levels=unique(ame_list$dataset)), y=estimate, color=model, aes(ymin=-6,ymax=6))+
  geom_point(aes(x=factor(ame_list$dataset, levels=unique(ame_list$dataset)), y=estimate,shape=model, color=model), size=2.5, position=dodge)+
  scale_y_continuous(breaks =c(-6, -4,-2, 0, 2, 4, 6))+
  geom_errorbar(aes(x=dataset, ymin=conf.low, ymax=conf.high, group=model, color=model), width=0.1,position=dodge, name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_color_manual(values=c("Adjusted/Matched"="red","Adjusted/Unmatched"="blue"), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_shape_manual(values=c("Adjusted/Matched"=19,"Adjusted/Unmatched"= 15), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched" ))+
  labs(x= "Dataset", y = "Average Marginal Effect (95% C.I.)")+
  theme_bw()+
  geom_hline(aes(yintercept=0), colour="#990000", linetype="dashed")+
  coord_flip()+theme(axis.text.y = element_text(size=12)) + 
  theme(axis.text.x = element_text(size=12), axis.title=element_text(size=16))+
  #theme(text = element_text(family = "Times New Roman"))+
  theme(legend.title=element_text(size=14), legend.key.size=unit(0.5, 'cm'), legend.text = element_text(size=14))+theme(legend.position="bottom")+
  guides(colour=guide_legend(nrow=1,byrow=TRUE))

